#region Using

using System;
using System.IO;
using System.Web;
using System.Net;
using System.Xml;
using System.Text.RegularExpressions;

#endregion

namespace Subkismet.Trackback
{
  /// <summary>
  /// TrackbackBase is an abstract base class to handle trackback requests.
  /// <para>
  /// Inherit from this class to filter out spammers trying to add comments
  /// to a blog post through trackback requests.
  /// </para>
  /// </summary>
  public abstract class TrackbackBase : IHttpHandler
  {

    #region Private constants

    private const string XML_VALID = "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?><response><error>0</error></response>";
    private const string XML_ALREADY_REGISTERED = "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?><response><error>Trackback already registered</error></response>";
    private static readonly Regex REGEX_HTML = new Regex(@"</?\w+((\s+\w+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.Compiled);

    #endregion

    #region Properties

    private bool _SenderHasLink;
    /// <summary>
    /// Gets whether or not the sender has a valid link to the blog post.
    /// </summary>
    public bool SenderHasLink
    {
      get { return _SenderHasLink; }
    }

    private bool _ExcerptContainsHtml;
    /// <summary>
    /// Gets if the excerpt contains HTML. It is considered to be spam if it does.
    /// </summary>
    public bool ExcerptContainsHtml
    {
      get { return _ExcerptContainsHtml; }
    }

    private bool _IsSenderAlreadyRegistered;
    /// <summary>
    /// Gets whether the sender already had a trackback registered on the post.
    /// </summary>
    public bool IsSenderAlreadyRegistered
    {
      get { return _IsSenderAlreadyRegistered; }
    }

    private bool _ExamineSendersUrl = true;
    /// <summary>
    /// Gets or sets whether or not to send a HTTP request to the sender's URL
    /// in order to examine it for a valid back link.
    /// </summary>
    public bool ExamineSendersUrl
    {
      get { return _ExamineSendersUrl; }
      set { _ExamineSendersUrl = value; }
    }

    private TrackbackStatus _Status = TrackbackStatus.None;
    /// <summary>
    /// Gets the status of the trackback request.
    /// </summary>
    public TrackbackStatus Status
    {
      get { return _Status; }
    }

    private string _PostId;
    /// <summary>
    /// Gets the ID of the post from the request parameters.
    /// </summary>
    public string PostId
    {
      get { return _PostId; }
    }

    private string _Title;
    /// <summary>
    /// Gets the title of the senders post from the request parameters.
    /// </summary>
    public string Title
    {
      get { return _Title; }
    }

    private string _Excerpt;
    /// <summary>
    /// Gets the excerpt from the request parameters.
    /// </summary>
    public string Excerpt
    {
      get { return _Excerpt; }
    }

    private string _BlogName;
    /// <summary>
    /// Gets the name of the senders blog from the request parameters.
    /// </summary>
    public string BlogName
    {
      get { return _BlogName; }
    }

    private string _Url;
    /// <summary>
    /// Gets the URL of the senders post from the request parameters.
    /// </summary>
    public string Url
    {
      get { return _Url; }
    }

    #endregion

    #region IHttpHandler members

    /// <summary>
    /// Gets a value indicating whether another request can use the <see cref="T:System.Web.IHttpHandler"></see> instance.
    /// </summary>
    /// <value></value>
    /// <returns>true if the <see cref="T:System.Web.IHttpHandler"></see> instance is reusable; otherwise, false.</returns>
    public bool IsReusable
    {
      get { return true; }
    }

    /// <summary>
    /// Enables processing of HTTP Web requests by a custom HttpHandler that 
    /// implements the <see cref="T:System.Web.IHttpHandler"></see> interface.
    /// </summary>
    /// <param name="context">An <see cref="T:System.Web.HttpContext"></see> 
    /// object that provides references to the intrinsic server objects 
    /// (for example, Request, Response, Session, and Server) used to service HTTP requests.
    /// </param>
    public void ProcessRequest(HttpContext context)
    {
      _PostId = context.Request.Params["id"]; ;
      _Title = context.Request.Params["title"];
      _Excerpt = context.Request.Params["excerpt"];
      _BlogName = context.Request.Params["blog_name"];
      _Url = null;

      if (context.Request.Params["url"] != null)
        _Url = context.Request.Params["url"].Split(',')[0];

      // Checking if the parameters are sent. If some are not, then it probably isn't spam, but the request is invalid.
      if (PostId == null || Title == null || Excerpt == null || BlogName == null || Url == null)
      {
        Finish(TrackbackStatus.Invalid);
      }
      else
      {
        ProcessTrackback(context);
      }
    }

    #endregion

    #region Process the trackback

    /// <summary>
    /// Processes the trackback if the request isn't invalid.
    /// It determines whether or not the request is spam or not.
    /// </summary>
    private void ProcessTrackback(HttpContext context)
    {
      bool postExists = DoesPostExist(PostId);

      if (postExists)
      {
        string postUrl = GetPostUrl(PostId);
        if (!string.IsNullOrEmpty(postUrl))
        {
          ExamineRequest(postUrl);

          if (IsTrackbackValid())
          {
            _Status = TrackbackStatus.Valid;
          }
          else if (!IsSenderAlreadyRegistered)
          {
            _Status = TrackbackStatus.AlreadyRegistered;
          }
          else if (!SenderHasLink || ExcerptContainsHtml)
          {
            _Status = TrackbackStatus.Spam;
          }

          SendResponse(context);          
        }
      }

      Finish(Status);
    }

    /// <summary>
    /// Sends a resonse to the requesting client, based on the
    /// validity of the request. If it is spam, then a HTTP
    /// status code 404 will be sent.
    /// </summary>
    protected virtual void SendResponse(HttpContext context)
    {
      switch (Status)
      {
        case TrackbackStatus.Valid:
          context.Response.Write(XML_VALID);
          context.Response.ContentType = "text/xml";
          break;

        case TrackbackStatus.AlreadyRegistered:
          context.Response.Write(XML_ALREADY_REGISTERED);
          context.Response.ContentType = "text/xml";
          break;

        case TrackbackStatus.Spam:
          context.Response.StatusCode = 404;
          break;
      }
    }

    #endregion

    #region Examines validity of the sender

    /// <summary>
    /// Returns whether or not the request is valid.
    /// </summary>
    /// <returns>True if the request is valid; otherwise false.</returns>
    protected virtual bool IsTrackbackValid()
    {
      if (IsSenderAlreadyRegistered)
        return false;

      if (ExcerptContainsHtml)
        return false;

      if (ExamineSendersUrl && !SenderHasLink)
        return false;

      return true;
    }

    /// <summary>
    /// Analyses the request and if ExamineSendersUrl is True, then it also 
    /// examines the senders URL for the referring link.
    /// </summary>
    private void ExamineRequest(string postUrl)
    {
      if (ExamineSendersUrl)
        ConfirmSender(postUrl);

      _IsSenderAlreadyRegistered = IsFirstTrackbackBySender();
      _ExcerptContainsHtml = ContainHtml();
    }

    /// <summary>
    /// Parse the HTML of the source page.
    /// </summary>
    private void ConfirmSender(string targetUrl)
    {
      try
      {
        using (WebClient client = new WebClient())
        {
          string html = client.DownloadString(Url);
          _SenderHasLink = html.ToLowerInvariant().Contains(targetUrl.ToLowerInvariant());
        }
      }
      catch (Exception)
      {
        _SenderHasLink = false;
      }
    }

    /// <summary>
    /// Checks whether or not the specified excerpt contains HTML.
    /// </summary>
    private bool ContainHtml()
    {
      return REGEX_HTML.IsMatch(Excerpt);
    }

    #endregion

    #region Abstract methods

    /// <summary>
    /// Check the specified postId and return a boolean value 
    /// indicating whether or not the post exists in the data store.
    /// </summary>
    /// <param name="postId">The ID of the post that received a trackback request.</param>
    /// <returns>True if the post exists, otherwise false.</returns>
    protected abstract bool DoesPostExist(string postId);

    /// <summary>
    /// Retrieve the URL of a post with the specified postId. 
    /// This method is only used if the ExamineSendersUrl property
    /// is set to true.
    /// <para>
    /// The returned string will be used to confirm that the sender
    /// actually has a link to your post.
    /// </para>
    /// </summary>
    /// <param name="postId">The ID of the post that received a trackback request.</param>
    /// <returns>A valid URL or part of the URL that the senders page must contain.</returns>
    protected abstract string GetPostUrl(string postId);

    /// <summary>
    /// Checks to see if the sender has already send a trackback to the post.
    /// If it has, there is no reason to add it again.
    /// <para>
    /// Remember that the sender is allowed to send trackback request to different
    /// posts, but not more than one per post. Therefore, only examine the post with
    /// the PostId of the request.
    /// </para>
    /// </summary>
    /// <returns>True if the sender hasn't had a trackback registered on the requested post; otherwise false</returns>
    protected abstract bool IsFirstTrackbackBySender();

    /// <summary>
    /// This method is called as the last thing after the trackback request
    /// has been processed. 
    /// <para>
    /// Use this method to redirect to the requested page or to change the response.
    /// </para>
    /// </summary>
    /// <param name="status">The status of the trackback request.</param>
    protected abstract void Finish(TrackbackStatus status);

    #endregion

    #region TrackbackStatus enum

    /// <summary>
    /// A list of statuses that is associated with a trackback request.
    /// </summary>
    public enum TrackbackStatus
    {
      /// <summary>I used when the request is valid but the post didn't exist in the data store.</summary>
      None,
      /// <summary>Indicates that the trackback isn't spam and hasn't been registered on the requested post before.</summary>
      Valid,
      /// <summary>Indicates that the trackback has already been registered on the requested post.</summary>
      AlreadyRegistered,
      /// <summary>Indicates that the HTTP request was invalid. It didn't send all the right parameters.</summary>
      Invalid,
      /// <summary>Indicates that the trackback request comes from a spammer.</summary>
      Spam
    }

    #endregion

  }
}
